Pré-requisitos

Pacotes necessários:

#if(!require(PSF)) install.packages("PSF")
#if(!require(timetk)) remotes::install_github("business-science/timetk")
if(!require(EflowStats)) remotes::install_github("USGS-R/EflowStats")


pacotes <- c(
  "here",
  "usethis",
  "data.table",
  "HEobs",
#  "PSF",
  "tidyverse",
  "lubridate",
  "fs",
  "checkmate",
#  "xts",
#  "hydroGOF",
#  "ModelMetrics",
#  "forecast",
#  "timetk",
  "EflowStats",
  "hydrostats",
 #"NbClust",
 #  "cluster",  
 #  "cluster.datasets", 
  "cowplot", 
  # "clValid",
  "ggfortify", 
  #"clustree",
  #"dendextend",
  #"factoextra",
  #"FactoMineR",
  #"corrplot",
  #"GGally",
  #"ggiraphExtra",
  "kableExtra",
  "tidymodels",
  "fasstr"
)
# Carregar os pacotes
easypackages::libraries(pacotes)

Scripts:

source(here('R', 'load-data.R'))
source(here('R', 'utils.R'))
source(here('R', 'prep-stn-data.R'))

Dados de vazão

Os dados importados de vazão devem ser regularmente espaçados no tempo. Esta adequação das séries diárias, se necessária, pode ser realizada com a função complete_dates() do pacote {lhmetools}. Assim assegura-se que todas estações possuam 1 observação por dia e sem datas faltantes.

Metadados

data_link <- "https://www.dropbox.com/s/d40adhw66uwueet/VazoesNaturaisONS_D_87UHEsDirceuAssis_2018.dat?dl=1"
qnat_meta <- extract_metadata(NA_character_, informative = TRUE)
glimpse(qnat_meta)
## Rows: 87
## Columns: 5
## $ estacao_codigo <dbl> 18, 237, 215, 119, 190, 32, 14, 247, 1, 216, 191, 149, …
## $ latitude       <dbl> -19.918751, -22.636078, -27.776389, -23.811460, -6.7985…
## $ longitude      <dbl> -49.92053, -48.27274, -51.18944, -46.52390, -43.92756, …
## $ nome_estacao   <chr> "A. VERMELHA", "BARRA BONITA", "BARRA GRANDE", "BILLING…
## $ municipio      <chr> "A. VERMELHA", "BARRA BONITA", "BARRA GRANDE", "BILLING…

Dados

qnat_data <- qnat_dly_ons() %>%
  select(date, qnat, code_stn) %>%
  lhmetools::complete_dates(group = "code_stn")
glimpse(qnat_data)
## Rows: 2,796,267
## Columns: 3
## $ date     <date> 1931-01-02, 1931-01-03, 1931-01-04, 1931-01-05, 1931-01-06, …
## $ code_stn <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ qnat     <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
# Incluindo nome da estacao
qnat_data <- qnat_data %>%
  full_join(select(qnat_meta, estacao_codigo, nome_estacao),
            by = c(code_stn = "estacao_codigo")
            ) %>%
  rename("name_stn" = nome_estacao)

Assinaturas hidrológicas para 1 posto

Preparação dos dados para aplicação da função calc_magnifSeven com a opção de ano hidrológico (water year) para o posto 74 da ONS (G. B. Munhoz).

qnat_posto <- qnat_data %>% 
  sel_station(.,station = 74)  
glimpse(qnat_posto)
## Rows: 32,141
## Columns: 4
## $ date     <date> 1931-01-02, 1931-01-03, 1931-01-04, 1931-01-05, 1931-01-06, …
## $ code_stn <dbl> 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 7…
## $ qnat     <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ name_stn <chr> "G.B. MUNHOZ", "G.B. MUNHOZ", "G.B. MUNHOZ", "G.B. MUNHOZ", "…
summary(qnat_posto)
##       date               code_stn       qnat           name_stn        
##  Min.   :1931-01-02   Min.   :74   Min.   :  69.58   Length:32141      
##  1st Qu.:1953-01-01   1st Qu.:74   1st Qu.: 300.02   Class :character  
##  Median :1975-01-01   Median :74   Median : 524.20   Mode  :character  
##  Mean   :1975-01-01   Mean   :74   Mean   : 730.61                     
##  3rd Qu.:1996-12-31   3rd Qu.:74   3rd Qu.: 958.57                     
##  Max.   :2018-12-31   Max.   :74   Max.   :7832.77                     
##                                    NA's   :13605
magnif7(stn_data = select(qnat_posto, date, qnat))
## # A tibble: 8 × 2
##   indice    statistic
##   <chr>         <dbl>
## 1 lam1        736.   
## 2 tau2          0.42 
## 3 tau3          0.35 
## 4 tau4          0.18 
## 5 ar1           0.982
## 6 amplitude     0.17 
## 7 phase       262.   
## 8 bfi           0.413

Dados agrupados por postos

by_stn <- qnat_data %>% 
  group_by(code_stn) %>%
  nest()

Gráfico da sazonalidade da fração de vazão anual.

Fração mensal da vazão anual

# https://jcoliver.github.io/learn-r/008-ggplot-dendrograms-and-heatmaps.html
# glimpse(by_stn[["data"]][[1]])

q_mon_clim <- by_stn %>%
  unnest(cols = "data") %>%
  group_by(code_stn, month = lubridate::month(date)) %>%
  select(-name_stn) %>%
  summarise(q_med = mean(qnat, na.rm = TRUE))
## `summarise()` has grouped output by 'code_stn'. You can override using the
## `.groups` argument.
q_ann_clim <- q_mon_clim %>%
  summarise(q_tot = sum(q_med))

q_mon_clim <- q_mon_clim %>% 
  full_join(q_ann_clim) %>%
  mutate(q_frac = q_med/q_tot * 100) %>%
  ungroup() %>%  
  mutate(code_stn = as.factor(code_stn),
         month = as.factor(month))
## Joining, by = "code_stn"
psych::describe(q_mon_clim) %>% 
  relocate(skew, kurtosis)
##           skew kurtosis vars    n     mean       sd  median trimmed     mad
## code_stn* 0.00    -1.20    1 1044    44.00    25.13   44.00   44.00   32.62
## month*    0.00    -1.22    2 1044     6.50     3.45    6.50    6.50    4.45
## q_med     6.10    46.86    3 1044  1192.13  3189.00  246.79  480.65  294.39
## q_tot     4.34    21.38    4 1044 14305.57 32676.29 3515.14 6265.35 3980.50
## q_frac    0.59    -0.32    5 1044     8.33     3.81    7.44    8.08    3.79
##              min       max     range      se
## code_stn*   1.00     87.00     86.00    0.78
## month*      1.00     12.00     11.00    0.11
## q_med       6.13  35961.74  35955.61   98.70
## q_tot     139.20 226833.52 226694.31 1011.31
## q_frac      1.11     20.30     19.19    0.12

Agrupamento dos postos

qlong <- select(q_mon_clim, code_stn, month, q_frac) %>%
  pivot_wider(names_from = "month", 
              values_from = "q_frac",
              names_prefix = "qfrac_"
              )
qlong_scaled <- qlong 
qlong_scaled[,-1] <- scale(qlong_scaled[,-1])

# Run clustering
qmat <- as.matrix(qlong_scaled[, -1])
rownames(qmat) <- qlong_scaled$code_stn

clustd <- dist(x = qmat)
cd <- hclust(d = clustd, method="ward.D2")
#cd <- hclust(d = dist(x = qmat))
# optclus <- sapply(2:20, 
#                    function(i) 
#                      summary(cluster::silhouette(cutree(cd, k = i), clustd))$avg.width
#                    )
# optclus
# which.max(optclus) # 2
q_dendro <- as.dendrogram(cd)

# Create dendro
dendro_plot <- ggdendro::ggdendrogram(data = q_dendro, rotate = TRUE)+
   theme(axis.text.y = element_text(size = 8))

# Preview the plot
dendro_plot

# linhas da ordem dos postos
q_order <- order.dendrogram(q_dendro)
# ordem dos postos
as.integer(as.vector(qlong_scaled$code_stn[q_order]))
##  [1]  73  71  72  77  78  74  76 111  93 220 102 286 101 216  92  94  98 215 277
## [20] 287 275 279 291  99 295 281 296 115  61  63 158 145 190 247 149 119 161 259
## [39] 294 266 120 121 237 243 240 242  47 117 134 196 144 283 188 255 205 209  33
## [58]  31  32 201  25 206  24 169 251   1 211   6  14  17  18 246  34 245 197 125
## [77] 130 254 155 156 278 257 253 191 270 271 273

Heat map da fração mensal da vazão anual com postos ordenados pelo agrupamento.

q_mon_clim_trans <-  q_mon_clim %>%
  mutate(code_stn = factor(x = code_stn,
                               levels = qlong_scaled$code_stn[q_order], 
                               ordered = TRUE),
         q_frac = round((q_frac/2))*2
         )

# cbind(t = q_mon_clim_trans$q_frac, o = q_mon_clim$q_frac)
cols <- c("firebrick1", "lightpink3", "lightsteelblue3", 
          "cadetblue1", "cornflowerblue","blue", "navyblue")

q_mon_clim_trans %>%
  
  ggplot(aes(x = month, y = code_stn)) +
  geom_tile(aes(fill = q_frac), colour = "white") +
  #geom_tile(aes(fill = factor(q_frac)), colour = "white") +
  scale_x_discrete(expand = c(0,0)) +
  theme_bw() +
  #scale_fill_viridis_c()
  #scale_fill_viridis_c(guide = "legend")
  scale_fill_gradientn( "Vazão (% da média anual)",
                       colours = cols,
                       #guide = "legend",
                       breaks = seq(0, 20, by = 2)
                       ) +
  #scale_fill_manual(values = cols) +
  theme(legend.title = element_text(angle = 90, vjust = 1),
        legend.key.height = unit(1.5, units = "cm")
        )

    # scale_fill_distiller(
    #                      palette='RdYlBu', 
    #                      direction = 1,
    #                      breaks = seq(0, 20, by = 2),
    #                      #guide = "legend"
    #                     )
library(heatmaply)
## Loading required package: plotly
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
## Loading required package: viridis
## Loading required package: viridisLite
## 
## Attaching package: 'viridis'
## The following object is masked from 'package:scales':
## 
##     viridis_pal
## 
## ======================
## Welcome to heatmaply version 1.3.0
## 
## Type citation('heatmaply') for how to cite the package.
## Type ?heatmaply for the main documentation.
## 
## The github page is: https://github.com/talgalili/heatmaply/
## Please submit your suggestions and bug-reports at: https://github.com/talgalili/heatmaply/issues
## You may ask questions at stackoverflow, use the r and heatmaply tags: 
##   https://stackoverflow.com/questions/tagged/heatmaply
## ======================
index_cols <- c(8:12, 1:7) + 1
qdata <- as.data.frame(qlong[, index_cols])
names(qdata) <- tolower(month.abb[index_cols-1])

lut_postos <- qnat_data %>% 
  select(contains("stn")) %>% 
  distinct() 
postos <- lut_postos$name_stn
postos <- setNames(postos, lut_postos$code_stn)
  
rownames(qdata) <- postos[as.character(qlong$code_stn)]

heatmaply(qdata, 
          k_row = 4, 
          Colv = FALSE,
          #k_col = 3, 
          #scale = "column"
          scale_fill_gradient_fun = scale_fill_gradientn( "Vazão \n (% da média anual)",
                       colours = cols,
                       #guide = "legend",
                       breaks = seq(0, 20, by = 2)
                       ),
          fontsize_row = 6
          #seriate = "mean",
          #seriate = "OLO"
          #seriate = "GW"
          #seriate = "none"
          )

Testando fasstr

https://bcgov.github.io/fasstr/articles/fasstr.html

Será relevante considerar a diferenças nos períodos dos anos hidrológicos por posto?

check_season_plots <- qnat_data %>%
  rename("Date" = date) %>%
  fasstr::plot_daily_stats(
    values = "qnat",
    groups = "code_stn",
    start_year = 1991,
    end_year = 2010,
    log_discharge = TRUE,
    add_year = 2001, 
    complete_years = TRUE, 
    include_title = TRUE
  )
codes <- names(check_season_plots) %>%
  readr::parse_number()

lookup <- qnat_data %>% 
  select(contains("stn")) %>% 
  distinct() %>%
  slice(order(codes)) %>%
  arrange(code_stn)

plot_l <- map(seq_along(codes), 
    function(i) {
      # i = 1
      nm <- filter(lookup, code_stn == codes[i]) %>%
        pull(name_stn)
      check_season_plots[[i]] + ggtitle(paste0("Posto: ", nm))
    })
plot_l

Assinaturas hidrológicas para todos postos

seven_stats <- by_stn %>%
  #ungroup() %>%
  mutate(stats = purrr::map(data, ~.x %>% select(date, qnat) %>% magnif7))
seven_stats_tidy <- seven_stats %>%
  select(stats) %>%
  unnest(cols = "stats") %>%
  pivot_wider(names_from = indice, values_from = statistic) %>%
  ungroup()
## Adding missing grouping variables: `code_stn`
seven_stats_tidy
## # A tibble: 87 × 9
##    code_stn   lam1  tau2  tau3  tau4   ar1 amplitude phase   bfi
##       <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>     <dbl> <dbl> <dbl>
##  1        1  128.   0.36  0.39  0.22 0.957      0.88  38.5 0.595
##  2        6  893.   0.37  0.36  0.19 0.974      0.91  41.8 0.570
##  3       14   52.8  0.38  0.37  0.21 0.944      0.86  44.4 0.551
##  4       17 1798.   0.35  0.34  0.17 0.985      0.96  48.2 0.603
##  5       18 2041.   0.34  0.33  0.16 0.986      0.97  49.3 0.609
##  6       24  454.   0.42  0.39  0.22 0.961      0.88  44.6 0.531
##  7       25  286.   0.37  0.37  0.21 0.943      0.88  46.8 0.577
##  8       31 1450.   0.38  0.35  0.19 0.975      0.95  49.0 0.568
##  9       32 1533.   0.37  0.35  0.18 0.976      0.96  49.5 0.571
## 10       33 2447.   0.36  0.32  0.16 0.978      1     50.5 0.587
## # … with 77 more rows
saveRDS(seven_stats_tidy, file = here("output", "seven_stats.RDS"))